org 100h
  mov ax,0x4f02
  mov bx,0x117
  int 10h       ; 1024x768 16-bit mode

  mov bp,0x200
%define d(xx) dword[byte bp+xx-0x200]
%define w(xx) word[byte bp+xx-0x200]

  push 0xa000
  pop es

  fninit

M:
  fldln2
  fldl2e         ;; 1.4427 0.6931

  fld d(T)       ;; t . .
  fadd d(DELTA_T) ; t += delta_t
  fst d(T)

  fmul st1,st0
  fmul st2,st0   ;; t t3=1.4427*t t2=0.6931*t
  fsincos        ;; c=cos(t) s=sin(t) t3=1.4427*t t2=0.6931*t

  fld st3
  fcos           ;; cos(t2) . . . .
  fidiv w(C10);; cos(t2)/10 . . . .
  fisubr w(C1) ;; scale=1-cos(t2)/10 c s t3 t2
  fmul st1,st0
  fmulp st2,st0  ;; C=c*scale S=s*scale t3 t2
  fstp d(C)
  fstp d(S)
  fstp d(T3)
  fstp d(T2)

  xor dx,dx  ; page: 0..23 | y: 0..768
P:
  mov ax,0x4f05  ; each line: set window, assume 64kB granularity
  xor bx,bx      ; bh=0 bl=window=0 dx=page
  int 10h

  xor di,di  ; di=pixel adr

             ; x: 0..1024

; dx:di address
; ax,bx scratch
; cx loop
; bp vars
; si 0x0100
Q:
  fldz
  fldz
  fldz       ; R=0 G=0 B=0

  mov ax,di
  shrd ax,dx,11
  sub ax,768/2     ; ax = y[-384..383]
  mov [bp+si],ax
  fild word[bp+si]
  fidiv w(C384) ;; y[-1..1] R G B

  mov ax,di
  shr ax,1
  and ax,1023
  sub ax,1024/2    ; ax = x[-512..511]
  mov [bp+si],ax
  fild word[bp+si]
  fidiv w(C384) ;; x[-1.33..1.33] y[-1..1] R G B

  stc
  call LEN
  fmul d(CHALF)  ;; d=length(x,y)/2 x y R G B
  fstp d(D)      ;; x y R G B

  mov cl,16     ; cx = i
I:
; rotate and scale
  ;[x] = [C -S]*[x]
  ;[y]   [S  C] [y]

  fld st1
  fld st1        ;; x y x y R G B

  fld d(S)       ;; S x y x y R G B
  fmul st1,st0
  fmulp st2,st0  ;; Sx Sy x y R G B

  fld d(C)       ;; C Sx Sy x y R G B
  fmul st3,st0
  fmulp st4,st0  ;; Sx Sy Cx Cy R G B

  faddp st3,st0  ;; Sy Cx y=Sx+Cy R G B
  fsubp st1,st0  ;; x=Cx-Sy y=Sx+Cy R G B

; square fold for now
FOLD:
  fsub d(CHALF) ;; x=x-0.5 y=y-0.5 R G B
  fist dword[bp+si]
  fisub dword[bp+si]
  fxch st1
  cmc
  jnc FOLD      ;; x=x-round(x) y=y-round(y) R G B

  ; interfering concentric circles
  call LEN
  fimul w(C5)   ;; 5*length(x,y) x y R G B
  fadd d(D)
  fsub d(T3)
  fcos
  fidivr w(C1)  ;; k=1/cos(d - t3 + 5*length(x,y)) x y R G B
  fstp d(K)    ;; x y R G B


  ; RGB += k * ( 0.5 + cos(3*(i/100 - d + t2) + [2 1 0]) );
  mov [bp+si],cx
  fild word[bp+si]
  fidiv w(C100) ;; i/100 x y R G B
  fsub d(D)
  fadd d(T2)
  fimul w(C3)   ;; q=3*(i/100 - d + t2) x y R G B

DUP_INC:
  fld1
  fadd st1
  cmc
  jnc DUP_INC   ;; q+2 q+1 q+0 x y R G B

A:
  fcos          ;; cos(q) . . x y R G B
  fadd d(CHALF) ;; cos(q)+0.5 . . x y R G B
  fmul d(K)     ;; k*(cos(q)+0.5) . . x y R G B
  faddp st5,st0 ;; . . x y R+=k*(cos(q)+0.5) G B
  add cl,85     ; +85, +170, +255 (= -1)
  jnc A         ;; x y [R G B]+=k*(cos(q+[2 1 0])+0.5)
  ; cf=1

  jnz I

  fcompp        ;; R G B

  ; RGB = RGB*RGB/64;  // square the sum for better contrast
  mov cl,3
COL:
  fmul st0
  fistp word[bp+si]
  mov bx,[bp+si]
  sar bx,6         ; R*R,G*G,B*B / 64
  cmp bx,31
  jb NO_CLAMP
  mov bl,31
NO_CLAMP:
  shl ax,5
  add al,bl        ; .rrr|rrgg|gggb|bbbb
  loop COL
  shl ax,1         ; rrrr|rggg|ggbb|bbb.
  sub al,bl        ; rrrr|rggg|gg.b|bbbb

  stosw
  stosw  ; 2x faster

  test di,di
  jnz Q

  inc dx
  cmp dl,768/32
  jb P

  in al,60h ; ESC check
  cmp al,1
  jne M

  mov ax,3 ; text mode
  int 10h
  ret

LEN: ;; x y -> r=sqrt(x*x+y*y)  ; cf=0
  fld st1
  fmul st0
  cmc
  jnc LEN
  faddp st1,st0
  fsqrt
  ret



C1    dw 1
C3    dw 3
C5    dw 5
C10   dw 10
C100  dw 100
C240  dw 240
C384  dw 384

CHALF dd 0.5

DELTA_T dd 0.5
T       dd 0.0

section .bss

C resd 1
S resd 1
T3 resd 1
T2 resd 1
D resd 1
K resd 1
